package mosdi.subcommands;

import mosdi.fa.FiniteMemoryTextModel;
import mosdi.util.Alphabet;
import mosdi.util.Log;
import mosdi.util.SequenceUtils;
import mosdi.util.iterators.LexicographicalIterator;
import mosdi.util.iterators.StringIterator;

public class QgramExpectations extends Subcommand {
	
	@Override
	public String usage() {
		return
		super.usage()+" [options] <text-model> <q>\n" +
		"\n" +
		"<text-model>: file containing a table of qgram frequencies to be used\n" +
		"              to estimate a text model. The order of the text model is\n" +
		"              determined by the length of q-grams in this file.\n" +
		"\n" +
		"Options:\n" +
		"  -i: round to integers\n" +
		"  -l <length>: give expectation for text of this length (default: 10.000.000)";
	}
	
	@Override
	public String description() {
		return "Computes expectation of each q-gram w.r.t. to a given text model.";
	}

	@Override
	public String name() {
		return "qgram-expectations";
	}
	
	@Override
	public int run(String[] args) {
		parseOptions(args, 2, "il:");

		// Option dependencies
		// -- none --

		// Mandatory arguments
		String textModelFilename = getStringArgument(0);
		int q = getIntArgument(1);

		// Options
		boolean roundToInts = getBooleanOption("i", false);
		long textLength = getNonNegativeLongOption("l", 10000000);
	
		if (textLength<q) {
			Log.errorln("Error: text length must not be smaller than q.");
			System.exit(1);
		}
		
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		FiniteMemoryTextModel textModel = null;
		try {
			textModel = SequenceUtils.buildTextModelFromQGramFile(textModelFilename);
		} catch (Exception e) {
			Log.errorln(e.toString());
			System.exit(1);
		}
		
		LexicographicalIterator iterator = new StringIterator(dnaAlphabet.size(), q);
		while (iterator.hasNext()) {
			int[] s = iterator.next();
			double e = textModel.expectation(s) * (textLength-q+1);
			if (roundToInts) {
				Log.printf(Log.Level.STANDARD, "%s\t%d\n", dnaAlphabet.buildString(s), (long)Math.round(e));
			} else {
				Log.printf(Log.Level.STANDARD, "%s\t%f\n", dnaAlphabet.buildString(s), e);
			}
		}
		
		return 0;
	}

}
